Partie 1 : Analyse descriptive
Unique French names
## Warning: Removed 1 rows containing missing values (geom_path).
Working with department data
=======Project
Hugo Broucke
07/12/2019
Part 1 : Descriptive analysis
Unique French names
Evolution over time
#Changing the format of year in national data
data_nat_clean$year <- str_pad(data_nat_clean$year,5,"right")
data_nat_clean$year <- str_replace(data_nat_clean$year," ","-31-12")
data_nat_clean$year <- as.Date(data_nat_clean$year, format="%Y-%d-%m")
#Calculate number of unique names
distinct_names <- data_nat_clean %>%
group_by(year) %>%
distinct(firstname) %>%
summarise(n_names = n())
#Plotting result
plot_distinct_names <-
distinct_names %>%
ggplot(aes(x=year, y=n_names)) +
geom_line(size=1.2, color="blue") +
ggtitle("Number of French unique names from 1900 to 2018") +
theme(plot.title = element_text(hjust = 0.5)) +
xlab("Year") + ylab("Number of unique names")
plot_distinct_names
Difference between one year to another over time
Most popular names for each region
#Creating region dataset
region <-data_dpt_clean %>%
mutate(region=case_when(
department %in% c("95", "78", "91", "77","94","92","93","75") ~ "Ile-de-France",
department %in% c("08","51","10","52") ~ "Champagne-Ardenne",
department %in% c("02","60","80") ~ "Picardie",
department %in% c("76", "27") ~ "Haute-normandie",
department %in% c("18","28","36","37", "41", "45") ~ "Centre",
department %in% c("14","50","61") ~ "Basse-Normandie",
department %in% c("21", "58", "71", "89") ~ "Bourgogne",
department %in% c("59","62") ~ "Nord-Pas-de-Calais",
department %in% c("54","55","57", "88") ~ "Lorraine",
department %in% c("67", "68") ~ "Alsace",
department %in% c("25","39","70","90") ~ "Franche-Comté",
department %in% c("44","49","53", "72","85") ~ "Pays de la Loire",
department %in% c("22", "29", "35", "56") ~ "Bretagne",
department %in% c("16","17","79","86") ~ "Poitou-CharenteS",
department %in% c("24","33","40","47","64") ~ "Aquitaine",
department %in% c("09", "12", "31", "32","46","65","81","82") ~ "Midi-Pyrenées",
department %in% c("19","23","87") ~ "Limousin",
department %in% c("01","07","26","38","42","69","73","74") ~ "Rhone-Alpes",
department %in% c("03", "15", "43", "63") ~ "Auvergne",
department %in% c("11","30","34","48", "66") ~ "Languedoc-Roussillon",
department %in% c("04","05","06","13", "83","84") ~ "PACA",
department %in% c("20") ~ "Corse",
department %in% c("971","972","973","974") ~ "Overseas territories",
))
#Creating df for best name in each region every year
table <- region %>%
group_by(year, sex, region, firstname) %>%
mutate(number = sum(number)) %>%
ungroup() %>%
group_by(year, sex, region) %>%
select(sex, firstname, year, number, region) %>%
unique() %>%
filter(number == max(number))
#Filtering by sex
boys <- table %>% filter(sex==1)
girls <- table %>% filter(sex==2)
Girls names
#Plotting result for girls
girls %>% plot_ly(
x = ~region,
y = ~number,
textposition = 'auto',
frame = ~year,
text = ~firstname,
hoverinfo = "number",
type = 'bar',
marker = list(color = 'rgb(255,192,203)'))%>%
layout (
xaxis = list( title = "French regions"),
yaxis=list(title="Number of births"),
title = list(text = "Evolution of the most popular girl French firstname according to the region from 1900 to 2018",
font=list(size=14)),
showlegend = FALSE) %>%
animation_slider(
currentvalue = list(font = list(color="black")),
pad = list(t=130))
Boys names
#Plotting result for boys
boys %>% plot_ly(
x = ~region,
y = ~number,
textposition = 'auto',
frame = ~year,
text = ~firstname,
hoverinfo = "number",
type = 'bar',
marker = list(color = 'rgb(135,206,235)'))%>%
layout (
xaxis = list( title = "French regions"),
yaxis=list(title="Number of births"),
title = list(text = "Evolution of the most popular boy French firstname according to the region from 1900 to 2018",
font=list(size=14)),
showlegend = FALSE) %>%
animation_slider(
currentvalue = list(font = list(color="black")),
pad = list(t=130))
Partie 2
zinedine <- data_nat_clean # %>% filter(year(year) > 1993 & year(year)<2003)
#zinedine$year <- as.factor(zinedine$year)
zinedine1 <- zinedine %>% filter(firstname == "ZINEDINE" | firstname=="BIXENTE" | firstname=="YOURI")
graph <- ggplot(zinedine1, aes(x = year, y = number, colour = firstname))+
geom_line(size=1.5) +
ggtitle("Plot of number of name by years for football player in 1998") +
xlab("Year") + ylab("Number of name") +
geom_vline(aes (xintercept =as.numeric(as.Date("1998-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("1998-01-01")), y=180, label="1998 Football world cup",colour = "black", show.legend=FALSE)
graph
#CM18 <- data_nat_clean %>% filter(year(year) > 2010 )
#CM18 <- CM18 %>% filter(firstname == "ANTOINE" | firstname=="KYLIAN" | firstname=="BLAISE" | firstname=="BENJAMIN" | firstname=="SAMUEL" | firstname=="HUGO")
#graph <- ggplot(CM18, aes(x = year, y = number, colour = firstname))+
# geom_line(size=1.5) + ggtitle("Plot of number of name by years for football player IN 2018") +
#xlab("Year") + ylab("Number of name")
#graph
got <- data_nat_clean %>% filter(firstname=="BRAN" |firstname == "SANSA" |firstname == "DAENERYS")
got <- got %>% filter(year(year) > 2004 )
graph <- ggplot(got, aes(x = year, y = number, colour = firstname))+
geom_line(size=1.5) +
ggtitle("Plot of number of name by years in link with Game of throne") +
xlab("Year") + ylab("Number of name")+
geom_vline(aes (xintercept =as.numeric(as.Date("2011-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("2011-01-01")), y=20, label="Game of throne, season 1",colour = "black", show.legend=FALSE)
graph
arwen <- data_nat_clean %>% filter(firstname == "ARWEN" & sex=="2")
cinema <- data_nat_clean %>% filter(firstname=="NEO" |firstname == "BELLA" |firstname == "ANAKIN")
cinema <- bind_rows(arwen, cinema)
cinema <- cinema%>% filter(year(year) > 1990 )
graph <- ggplot(cinema, aes(x = year, y = number, colour = firstname))+
geom_line(size=1.5) + ggtitle("Plot of number of name by years in link with the cinema") +
xlab("Year") + ylab("Number of name")+
geom_vline(aes (xintercept =as.numeric(as.Date("1999-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("1999-01-01")), y=25, label="Matrix",colour = "black", size = 3, show.legend=FALSE)+
geom_vline(aes (xintercept =as.numeric(as.Date("2009-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("2009-01-01")),y=105, label="Twilight",colour = "black", size = 3, show.legend=FALSE)+
geom_label(x=as.numeric(as.Date("1999-01-01")), y=100, label="Star Wars: Episode I ",colour = "black", size = 3, show.legend=FALSE)+
geom_vline(aes (xintercept =as.numeric(as.Date("2001-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("2001-01-01")), y=175, label="The Lord of the rings ",colour = "black", size = 3, show.legend=FALSE)
graph
adolphe <- data_nat_clean %>% filter(firstname == "ADOLPHE" & sex=="1")
adolphe <- adolphe %>% filter(year(year) > 1909 & year(year) < 1951 )
victoire <- data_nat_clean %>% filter(firstname == "VICTOIRE" & sex=="2")
victoire <- victoire %>% filter(year(year) > 1909 & year(year) < 1951 )
ww <- data_nat_clean %>% filter(firstname == "JOFFRE" | firstname == "JOFFRETTE"| firstname == "ADOLPHINE")
ww <- ww%>% filter(year(year) > 1909 & year(year) < 1951 )
ww <- bind_rows(ww, adolphe, victoire)
graph <- ggplot(ww, aes(x = year, y = number, colour = firstname))+
geom_line(size=1) + ggtitle("Plot of number of name by years in link with the world wars") +
xlab("Year") + ylab("Number of name")+
geom_vline(aes (xintercept =as.numeric(as.Date("1914-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("1914-01-01")), y=0, label="Marne's Battle",colour = "black", size = 2.5, show.legend=FALSE)+
geom_vline(aes (xintercept =as.numeric(as.Date("1921-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("1921-01-01")),y=550, label="Hitler leader of the NSDAP",colour = "black", size = 2.5, show.legend=FALSE)+
geom_vline(aes (xintercept =as.numeric(as.Date("1918-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("1918-01-01")), y=500, label="End of the World War I",colour = "black", size = 2.5, show.legend=FALSE)+
geom_vline(aes (xintercept =as.numeric(as.Date("1945-01-01"))), linetype = "longdash")+
geom_label(x=as.numeric(as.Date("1945-01-01")), y=200, label="End of the World II",colour = "black", size = 2.5, show.legend=FALSE)
graph
got <- data_nat_clean %>% filter(firstname=="FELICIE" |firstname == "ROXANNE" |firstname == "OSCAR" |firstname == "LOLITA")
#got <- got %>% filter(year(year) > 2004 )
graph <- ggplot(got, aes(x = year, y = number, colour = firstname))+
geom_line(size=1.5) +
ggtitle("Plot of number of name by years in link with Game of throne") +
xlab("Year") + ylab("Number of name")
#geom_vline(aes (xintercept =as.numeric(as.Date("2011-01-01"))), linetype = "longdash")+
#geom_label(x=as.numeric(as.Date("2011-01-01")), y=20, label="Game of throne, season 1",colour = "black", show.legend=FALSE)
graph
data_dpt_clean$year <- str_pad(data_dpt_clean$year,5,"right")
data_dpt_clean$year <- str_replace(data_dpt_clean$year," ","-31-12")
data_dpt_clean$year <- as.Date(data_dpt_clean$year, format="%Y-%d-%m")
dep <- data_dpt_clean %>% filter(year(year)>1989 & year(year)<2013)
dep <-dep %>% mutate(department=case_when(
department %in% c("95", "78", "91", "77","94","92","93","75") ~ "Ile-de-France",
department %in% c("08","51","10","52") ~ "Champagne-Ardenne",
department %in% c("02","60","80") ~ "Picardie",
department %in% c("76", "27") ~ "Haute-normandie",
department %in% c("18","28","36","37", "41", "45") ~ "Centre",
department %in% c("14","50","61") ~ "Basse-Normandie",
department %in% c("21", "58", "71", "89") ~ "Bourgogne",
department %in% c("59","62") ~ "Nord-Pas-de-Calais",
department %in% c("54","55","57", "88") ~ "Lorraine",
department %in% c("67", "68") ~ "Alsace",
department %in% c("25","39","70","90") ~ "Franche-Comté",
department %in% c("44","49","53", "72","85") ~ "Pays de la Loire",
department %in% c("22", "29", "35", "56") ~ "Bretagne",
department %in% c("16","17","79","86") ~ "Poitou-CharenteS",
department %in% c("24","33","40","47","64") ~ "Aquitaine",
department %in% c("09", "12", "31", "32","46","65","81","82") ~ "Midi-Pyrenées",
department %in% c("19","23","87") ~ "Limousin",
department %in% c("01","07","26","38","42","69","73","74") ~ "Rhone-Alpes",
department %in% c("03", "15", "43", "63") ~ "Auvergne",
department %in% c("11","30","34","48", "66") ~ "Languedoc-Roussillon",
department %in% c("04","05","06","13", "83","84") ~ "PACA ",
department %in% c("20") ~ "Corse",
department %in% c("971","972","973","974") ~ "Overseas territories",
))
dep <- dep %>% rename(Region = department)
eco1990boy <- dep %>% filter(sex==1, year(year)==1990) %>%
group_by(Region) %>%
filter(number ==max(number))
eco1990boy <- merge(eco1990boy, eco, by = "Region")
eco1990girl <- dep %>% filter(sex==2, year(year)==1990) %>%
group_by(Region) %>%
filter(number ==max(number))
eco1990girl <- merge(eco1990girl, eco, by = "Region")
eco1990 <- bind_rows(eco1990boy, eco1990girl)
graph <- ggplot(eco1990boy, aes(x = firstname, y = X1990, color=Region))+
geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 1990 by Region and GDP") +
xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph
graph <- ggplot(eco1990girl, aes(x = firstname, y = X1990, color=Region))+
geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 1990 by Region and GDP") +
xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph
eco2000boy <- dep %>% filter(sex==1, year(year)==2000) %>%
group_by(Region) %>%
filter(number ==max(number))
eco2000boy <- merge(eco2000boy, eco, by = "Region")
eco2000girl <- dep %>% filter(sex==2, year(year)==2000) %>%
group_by(Region) %>%
filter(number ==max(number))
eco2000girl <- merge(eco2000girl, eco, by = "Region")
eco2000 <- bind_rows(eco2000boy, eco2000girl)
graph <- ggplot(eco2000boy, aes(x = firstname, y = X2000, color=Region))+
geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2000 by Region and GDP") +
xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph
graph <- ggplot(eco2000girl, aes(x = firstname, y = X2000, color=Region))+
geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2000 by Region and GDP") +
xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph
eco2010boy <- dep %>% filter(sex==1, year(year)==2010) %>%
group_by(Region) %>%
filter(number ==max(number))
eco2010boy <- merge(eco2010boy, eco, by = "Region")
eco2010girl <- dep %>% filter(sex==2, year(year)==2010) %>%
group_by(Region) %>%
filter(number ==max(number))
eco2010girl <- merge(eco2010girl, eco, by = "Region")
eco2010 <- bind_rows(eco2010boy, eco2010girl)
graph <- ggplot(eco2010boy, aes(x = firstname, y = X2010, color=Region))+
geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2010 by Region and GDP") +
xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph
graph <- ggplot(eco2010girl, aes(x = firstname, y = X2010, color=Region))+
geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2010 by Region and GDP") +
xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph
The popularity of Franch Names
In this part we will study which events have an impact on the popularity of French first names.
French names and sport
In this part we will study the impact of sport on the popularity of French first names. To illustrate this example, we chose to study the most famous sports event in France: the 1998 World Cup. This World Cup was won by the French for the first time.
In this graph, we see that the names of three players of the team of France at the 1998 World Cup have a peak of popularity after the victory of France. We can therefore assume that major sports events have an impact on French first names.
French names and series
The next graph will highlight the impact of TV series on the first name of the French. To illustrate this, we chose one of the best known series in France: Games of throne. We can see the names of the famous characters Bran, Daenerys and Sensa.
We even observe that the names Daenerys and Sensa appeared just after the diffusion of the series.
French names and films
In the same spirit as the previous part, we will now observe the impact of films on French first names. For that we selected the films Matrix with the character of Neo, the movie Star Wars: Episode I with e character of Anakin, the movie The lord of the rings with Arwen and finally the movie Twilight with the name Bella. Besides, the name Bella, the other names appeared in France just after the diffusion of these films. The cinema therefore has an impact on French first names.
French names and world wars
One of the most tragic recent events in France is the two world wars. So we decided to see the impact of these wars on the first names of the French. One of the heroes of the war is Marchal Joffre, who won the Marne victory. We can see that this victory has an impact on the names of the French since the names Joffre and his feminine Joffrette have a peak of popularity just after this battle. When we look at the name Victoire, which means victory in French, there are two peaks in 1918 and 1945 which are the dates of the end of these two wars. On the other hand, when we look at the names Adolphes and its feminine derivative Adolphine, we see that their popularity plummets when Adolph Hitler comes to power in Germany.
French names and music
In this part, we will study the impact of music on french names. We have selected for this three songs. The first is Roxanne by The Police, this song released in 1978 will increase the popularity of the name Roxanne in France. Similarly the song Oscar by Renaud released in 1981 will explode the popularity of the name Oscar. Finally, the song Moi … Lolita by Alizé will create a new peak of popularity for the name Lolita.
French names and economy
In this part we will observe if there is a correlation between French first names and the economy of the regions. For that we studied the names most given to the little boy and the little girl in 1990, 2000 and 2010 for each French region.
In 1990 we notice that the region Ile de France which is the richest to a popular name differs from other French regions. This is the case for the male and female first names.
The same phenomenon is observed in 2000.
In 2010, the richest region still has different names from other regions. With these observations, it is difficult to say that the economy has an impact on French first names. If we see a difference of first names in the richest region, we do not observe any difference with the poorest region.